# -*- coding: utf-8 -*-
import copy
import random
import scrapy
from scrapy import Request
from scrapy.exceptions import IgnoreRequest
from scrapy.utils.project import get_project_settings
from zc_core.spiders.base import BaseSpider
from esgcc.util.brand_helper import BrandHelper
from zc_core.dao.sku_dao import SkuDao
from zc_core.util.http_util import retry_request
from zc_core.dao.sku_pool_dao import SkuPoolDao
from zc_core.dao.batch_dao import BatchDao
from zc_core.util.done_filter import DoneFilter
from esgcc.rules import *
from esgcc.util.login import SeleniumLogin


class FullSpider(BaseSpider):
    name = 'full'
    # 常用链接
    index_url = 'http://b.esgcc.com.cn/showIndex/index.htm'
    item_url = 'http://b.esgcc.com.cn/showDetail/{}'
    prod_detail_url = 'http://b.esgcc.com.cn/products/loadProductDetailInfomationAll?productId={}'
    prod_group_url = 'http://b.esgcc.com.cn/showDetail/getOfficeProductCompare.do?prodId={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(FullSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        # 创建批次记录
        BatchDao().create_batch(self.batch_no)
        # 避免重复采集
        self.done_filter = DoneFilter(self.batch_no)
        # 减少参数采集
        self.brand_filter = BrandHelper()

    def start_requests(self):
        cookies = SeleniumLogin().get_cookies()
        # cookies = {'JSESSIONID': 'A3CC9662A9916C102F7C84E21FF1F034', '__d_s_': 'F7836B3AC6482A56E3DEBB9F7512ECF4', '__s_f_c_s_': 'A463FF84CA4C1D5B54A903F0798B08AE', '__t_c_k_': 'bf37ebceb6294fde965f9ac39961a13d'}
        if not cookies:
            self.logger.error('init cookie failed...')
            return
        self.logger.info('init cookie: %s', cookies)

        yield Request(
            url=self.index_url,
            meta={
                'reqType': 'catalog',
                'batchNo': self.batch_no,
            },
            headers={
                'Connection': 'keep-alive',
                'Accept': 'text/javascript, application/javascript, application/ecmascript, application/x-ecmascript, */*; q=0.01',
                'X-Requested-With': 'XMLHttpRequest',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3741.400 QQBrowser/10.5.3863.400',
                'Accept-Encoding': 'gzip, deflate',
                'Accept-Language': 'zh-CN,zh;q=0.9',
            },
            cookies=cookies,
            callback=self.init_cookie,
            errback=self.error_back,
            priority=200,
        )

    def init_cookie(self, response):
        pool_list = SkuPoolDao().get_sku_pool_list()
        # pool_list = SkuDao().get_batch_sku_list(self.batch_no)
        self.logger.info('全量：%s' % (len(pool_list)))
        dist_list = [x for x in pool_list if not self.done_filter.contains(x.get('_id'))]
        self.logger.info('目标：%s' % (len(dist_list)))
        random.shuffle(dist_list)
        for sku in dist_list:
            sku_id = sku.get('_id')
            # 避免无效采集
            offline_time = sku.get('offlineTime', 0)
            settings = get_project_settings()
            if offline_time > settings.get('MAX_OFFLINE_TIME', 1):
                self.logger.info('忽略: [%s][%s]', sku_id, offline_time)
                continue
            if self.done_filter.contains(sku_id) and not settings.get('FORCE_RECOVER', False):
                self.logger.info('已采: [%s]', sku_id)
                continue

            # 采集商品价格及状态
            yield Request(
                method='POST',
                url=self.item_url.format(sku_id),
                callback=self.parse_item_data,
                errback=self.error_back,
                meta={
                    'reqType': 'item',
                    'batchNo': self.batch_no,
                    'skuId': sku_id,
                },
                priority=25,
            )

    # 处理ItemData
    def parse_item_data(self, response):
        data = parse_item_data(response)
        self.logger.info('数据: [%s]' % data.get('skuId'))

        br_item = self.brand_filter.get(data.get('skuId'))
        if br_item:
            data['brandId'] = br_item.get('brandId', '')
            data['brandName'] = br_item.get('brandName', '')
            data['brandModel'] = br_item.get('brandModel', '')
            data['onSaleTime'] = br_item.get('onSaleTime', '')
            self.logger.info('商品1: [%s]' % data.get('skuId'))
            yield data
        else:
            # 采商品介绍
            yield Request(
                method='POST',
                url=self.prod_detail_url.format(data.get('skuId')),
                callback=self.parse_prod_detail,
                errback=self.error_back,
                meta={
                    'reqType': 'prod_detail',
                    'skuId': data.get('skuId'),
                    'data': copy.copy(data),
                },
                priority=50,
            )

    # 商品介绍
    def parse_prod_detail(self, response):
        data = parse_prod_detail(response)
        self.logger.info('商品2: [%s]' % data.get('skuId'))
        yield data
